import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import random
import math
import time
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import datetime
import operator
df = pd.read_csv("time_series_covid19_confirmed_global.csv")
df.head()
df1 = df.copy()
df1.drop(["Province/State","Lat","Long"],inplace=True,axis=1)
df1 = df1.melt(id_vars=["Country/Region"], var_name="Date", value_name="Value")
fig = px.line(df1, x="Date", y="Value", title='Change in Confirmed cases with time',color='Country/Region')
fig.show()
df.iloc[:,[1,-1]].groupby("Country/Region").sum().reset_index().sort_values(by='7/13/20',ascending=False)[:5]
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'US']['Date'], y=df1[df1["Country/Region"] == 'US']['Value'],
mode='lines',
name='US'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Brazil']['Date'], y=df1[df1["Country/Region"] == 'Brazil']['Value'],
mode='lines',
name='Brazil'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Russia']['Date'], y=df1[df1["Country/Region"] == 'Russia']['Value'],
mode='lines',
name='Russia'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'India']['Date'], y=df1[df1["Country/Region"] == 'India']['Value'],
mode='lines',
name='India'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Peru']['Date'], y=df1[df1["Country/Region"] == 'Peru']['Value'],
mode='lines',
name='Peru'))
fig.update_layout(
title = "Time Series Analysis of (Date and Confirmed Cases) for Countries with Highest Cases",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Confirmed Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
df.iloc[:,[1,-1]].groupby("Country/Region").sum().reset_index().sort_values(by='7/13/20')[:5]
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Western Sahara']['Date'], y=df1[df1["Country/Region"] == 'Western Sahara']['Value'],
mode='lines',
name='Western Sahara'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Papua New Guinea']['Date'], y=df1[df1["Country/Region"] == 'Papua New Guinea']['Value'],
mode='lines',
name='Papua New Guinea'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'MS Zaandam']['Date'], y=df1[df1["Country/Region"] == 'MS Zaandam']['Value'],
mode='lines',
name='MS Zaandam'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Holy See']['Date'], y=df1[df1["Country/Region"] == 'Holy See']['Value'],
mode='lines',
name='Holy See'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Saint Kitts and Nevis']['Date'], y=df1[df1["Country/Region"] == 'Saint Kitts and Nevis']['Value'],
mode='lines',
name='Saint Kitts and Nevis'))
fig.update_layout(
title = "Time Series Analysis of (Date and Confirmed Cases) for Countries with Lowest Cases",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Confirmed Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
fig = px.line(df1, x="Date", y="Value", title='Log of cases over time for all the countries',color='Country/Region')
fig.update_layout(yaxis_type="log",
yaxis = dict(title_text = "log(Confirmed Cases)"))
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'US']['Date'], y=df1[df1["Country/Region"] == 'US']['Value'],
mode='lines',
name='US'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Brazil']['Date'], y=df1[df1["Country/Region"] == 'Brazil']['Value'],
mode='lines',
name='Brazil'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Russia']['Date'], y=df1[df1["Country/Region"] == 'Russia']['Value'],
mode='lines',
name='Russia'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'India']['Date'], y=df1[df1["Country/Region"] == 'India']['Value'],
mode='lines',
name='India'))
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Peru']['Date'], y=df1[df1["Country/Region"] == 'Peru']['Value'],
mode='lines',
name='Peru'))
fig.update_layout(
title = "Log of cases over time for top 5 countries",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "log(Confirmed Cases)",
title_font = {"size": 20},
title_standoff = 25))
fig.update_layout(yaxis_type="log")
fig.show()
f,ax = plt.subplots(figsize=(15,6))
sns.distplot(df1[df1["Country/Region"] == "US"]["Value"])
plt.xlabel("Confirmed Cases")
plt.ylabel("Days")
plt.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'US']['Date'], y=df1[df1["Country/Region"] == 'US']['Value'],
mode='lines',
name='US'))
fig.update_layout(
title = "Confirmed Cases in U.S.",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Confirmed Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'US']['Date'], y=df1[df1["Country/Region"] == 'US']['Value'],
mode='lines',
name='US'))
fig.update_layout(
title = "Log of Confirmed Cases in U.S.",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "log(Confirmed Cases)",
title_font = {"size": 20},
title_standoff = 25))
fig.update_layout(yaxis_type="log")
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'India']['Date'], y=df1[df1["Country/Region"] == 'India']['Value'],
mode='lines',
name='India'))
fig.update_layout(
title = "Confirmed Cases in India",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Confirmed Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'India']['Date'], y=df1[df1["Country/Region"] == 'India']['Value'],
mode='lines',
name='India'))
fig.update_layout(
title = "Log of Confirmed Cases in India",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "log(Confirmed Cases)",
title_font = {"size": 20},
title_standoff = 25))
fig.update_layout(yaxis_type="log")
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Spain']['Date'], y=df1[df1["Country/Region"] == 'Spain']['Value'],
mode='lines',
name='Spain'))
fig.update_layout(
title = "Confirmed Cases in Spain",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Confirmed Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
fig = go.Figure()
fig.add_trace(go.Line(x=df1[df1["Country/Region"] == 'Spain']['Date'], y=df1[df1["Country/Region"] == 'Spain']['Value'],
mode='lines',
name='China'))
fig.update_layout(
title = "Log of Confirmed Cases in Spain",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "log(Confirmed Cases)",
title_font = {"size": 20},
title_standoff = 25))
fig.update_layout(yaxis_type="log")
fig.show()
country_tot = df.iloc[:,[1,-1]].groupby("Country/Region").sum().reset_index().sort_values(by='7/13/20',ascending=False)[:20]
fig = go.Figure()
fig.add_trace(go.Bar(
y=country_tot["Country/Region"],
x= country_tot["7/13/20"],
orientation='h',
marker=dict(
color='rgba(246, 78, 139, 0.6)',
line=dict(color='rgba(246, 78, 139, 1.0)', width=2)
)
))
fig.update_layout(
title = "Confirmed Cases all over the world",
xaxis = dict(
title_text = "Cases",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Country",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
world_daily = df1.groupby("Date").sum().reset_index()
fig = go.Figure(go.Bar(
x=world_daily["Date"],
y=world_daily["Value"],
orientation='v'))
fig.update_layout(
title = "World Daily rise in Confirmed Cases",
xaxis = dict(
title_text = "Date",
title_font = {"size": 20},
title_standoff = 25),
yaxis = dict(
title_text = "Cases",
title_font = {"size": 20},
title_standoff = 25))
fig.show()
df.head()
columns = df.keys()
confirmed = df.loc[:, columns[4]:columns[-1]]
dates = confirmed.keys()
world_cases = []
for i in dates:
confirmed_sum = confirmed[i].sum()
world_cases.append(confirmed_sum)
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
days_in_future = 15
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-15]
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22, world_cases, test_size=0.15, shuffle=False)
linear_model = LinearRegression(normalize=True, fit_intercept=True)
linear_model.fit(X_train_confirmed, y_train_confirmed)
test_linear_pred = linear_model.predict(X_test_confirmed)
linear_pred = linear_model.predict(future_forcast)
print('MAE:', mean_absolute_error(test_linear_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_linear_pred, y_test_confirmed))
print(linear_model.coef_)
print(linear_model.intercept_)
plt.figure(figsize=(20, 12))
plt.plot(adjusted_dates, world_cases)
plt.plot(future_forcast, linear_pred, linestyle='dashed', color='orange')
plt.title('Number of Covid Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Number of Cases', size=30)
plt.legend(['Confirmed Cases', 'Linear Regression Predictions'])
plt.xticks(size=15)
plt.show()
print('Linear regression future predictions:')
print(linear_pred[-15:])
svm_confirmed = SVR(shrinking=True, kernel='poly',gamma=0.01, epsilon=1,degree=3, C=0.1)
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
# check against testing data
svm_test_pred = svm_confirmed.predict(X_test_confirmed)
plt.figure(figsize=(15,6))
plt.plot(y_test_confirmed)
plt.plot(svm_test_pred)
plt.legend(['Test Data', 'SVM Predictions'])
print('MAE:', mean_absolute_error(svm_test_pred, y_test_confirmed))
print('MSE:',mean_squared_error(svm_test_pred, y_test_confirmed))
x = adjusted_dates
y = world_cases
pred = svm_pred
algo_name = 'SVM Predictions'
color = 'purple'
plt.figure(figsize=(15, 8))
plt.plot(x, y)
plt.plot(future_forcast, pred, linestyle='dashed', color=color)
plt.title('Worldwide Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Number of Cases', size=30)
plt.legend(['Confirmed Cases', algo_name], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()